# This script provides internal helper functions to format and output tables

# Output table to LaTeX file
myround <- function(vec, dig){return(formatC(round(as.numeric(as.character(vec)),dig),dig,format="f"))}
ptable <- function(contents, file){
  print(xtable(contents), type='latex', sanitize.text.function=identity, include.rownames = FALSE, include.colnames = FALSE, 
        file = file, only.contents = TRUE, hline.after = NULL, add.to.row = addtorow)
}

# This function takes a list of regression models and returns a table with raw and Holm-adjusted coefficients for each model
tab_func <- function(mods,amce_acie_resp = "amce"){
  coefs <- c()
  se <- c()
  coefs_adj <- c()
  se_adj <- c()
  
  if(amce_acie_resp == "amce"){
    for(m in mods){
      coefs_m <- m$amce$Estimate
      se_m <- m$amce$`Std. Err`
      coefs <- c(coefs, coefs_m*100)
      se <- c(se, se_m*100)
      coefs_adj <- c(coefs_adj, coefs_m*100)
      se_adj <- c(se_adj,abs(coefs_m/qnorm(1-(p.adjust(m$amce$`Pr(>|z|)`,method = "holm")/2))*100))
    }
  } else if (amce_acie_resp == "acie"){
    for(m in mods){
      coefs_m <- c(m$amce$Estimate,m$acie$Estimate)
      se_m <- c(m$amce$`Std. Err`,m$acie$`Std. Err`)
      coefs <- c(coefs, coefs_m*100)
      se <- c(se, se_m*100)
      coefs_adj <- c(coefs_adj, coefs_m*100)
      se_adj <- c(se_adj, abs(coefs_m/qnorm(1-(p.adjust(c(m$amce$`Pr(>|z|)`,m$acie$`Pr(>|z|)`),method = "holm")/2))*100))
    }
    acie_idx <- grep(as.vector(unlist(sapply(mods,function(x){c(x$amce$Level,x$acie$Level)}))),pattern="[:]")
    coefs <- coefs[acie_idx]
    se <- se[acie_idx]
    coefs_adj <- coefs_adj[acie_idx]
    se_adj <- se_adj[acie_idx]
  } else if (amce_acie_resp == "resp"){
    for(m in mods){
      int_idx <- names(m)[(which(names(m)=="baselines_amce")+1):(which(names(m)=="table_values_amce")-1)]
      coefs_m <- as.vector(unlist(sapply(m[int_idx],function(y){y$Estimate})))
      se_m <- as.vector(unlist(sapply(m[int_idx],function(y){y$`Std. Err`})))
      coefs <- c(coefs, coefs_m*100)
      se <- c(se, se_m*100)
      coefs_adj <- c(coefs_adj,coefs_m*100)
      se_adj <- c(se_adj,abs(coefs_m/qnorm(1-(p.adjust(as.vector(unlist(sapply(m[int_idx],function(y){y$`Pr(>|z|)`}))),method = "holm")/2))*100))
    }
  }
  z <- abs(coefs/se)
  z_adj <- abs(coefs_adj/se_adj)
  stars <- rep("",length(coefs)); stars[z>qnorm(1-0.1/2)] <- "^{+}"; stars[z>qnorm(1-0.05/2)] <- "^{*}"; stars[z>qnorm(1-0.01/2)] <- "^{**}"; stars[z>qnorm(1-0.001/2)] <- "^{***}"
  stars_adj <- rep("",length(coefs_adj)); stars_adj[z_adj>qnorm(1-0.1/2)] <- "^{+}"; stars_adj[z_adj>qnorm(1-0.05/2)] <- "^{*}"; stars_adj[z_adj>qnorm(1-0.01/2)] <- "^{**}"; stars_adj[z_adj>qnorm(1-0.001/2)] <- "^{***}"
  coefs <- paste(myround(coefs,2),stars,sep="")
  se <- paste("(",myround(se,2),")",sep="")
  coefs_adj <- paste(myround(coefs_adj,2),stars_adj,sep="")
  se_adj <- paste("(",myround(se_adj,2),")",sep="")
  return(tibble(coefs,se,coefs_adj,se_adj))
}

# Reformat table data for plotting 
tab_to_plotdat_func <- function(tab, adjustment = "none"){
  # Remove undesired estimates
  if(adjustment == "none"){
    tab <- tab[,-which(str_detect(colnames(tab),pattern = "eff_adj$|fea_adj$|pro_adj$"))]
  } else if (adjustment == "holm"){
    tab <- tab[,-which(str_detect(colnames(tab),pattern = "eff$|fea$|pro$"))]
  }
  
  # Remove LaTeX formatting
  for(c in colnames(tab)[-which(str_detect(colnames(tab),pattern = "eff|fea|pro"))]){
    tab[,c] <- str_extract(unlist(tab[,c]),pattern = "[{](.{0,})[}](.[^[{]]{0,}){0,}$")
    tab[,c] <- str_remove_all(unlist(tab[,c]),pattern = "[{]1[}][{][c|l][}]|[{]|[}]")
  }
  
  if(adjustment == "none"){
    tab$eff <- as.numeric(str_extract(tab$eff,pattern = "-{0,}[0-9]{1,}[.][0-9][0-9]"))
    tab$fea <- as.numeric(str_extract(tab$fea,pattern = "-{0,}[0-9]{1,}[.][0-9][0-9]"))
    tab$pro <- as.numeric(str_extract(tab$pro,pattern = "-{0,}[0-9]{1,}[.][0-9][0-9]"))
  } else if (adjustment == "holm"){
    tab$eff_adj <- as.numeric(str_extract(tab$eff_adj,pattern = "-{0,}[0-9]{1,}[.][0-9][0-9]"))
    tab$fea_adj <- as.numeric(str_extract(tab$fea_adj,pattern = "-{0,}[0-9]{1,}[.][0-9][0-9]"))
    tab$pro_adj <- as.numeric(str_extract(tab$pro_adj,pattern = "-{0,}[0-9]{1,}[.][0-9][0-9]"))
    
    tab <- tab %>%
      rename(eff = eff_adj,
             fea = fea_adj,
             pro = pro_adj)
  }
  
  # Remove summary stats rows
  tab <- tab[-which(str_detect(unlist(tab[,1]),pattern = "^N[(]")),]
  
  # Add omitted row information
  for(c in colnames(tab)[-which(str_detect(colnames(tab),pattern = "eff|fea|pro"))]){
    tab[,c] <- na.locf(unlist(tab[,c]))
  }
  
  # Place SEs in separate columns and make long
  tab <- left_join(tab[seq(1,nrow(tab),by=2),] %>%
                     pivot_longer(-c(1:(ncol(tab)-3)),names_to = "type",values_to = "est"),
                   tab[seq(2,nrow(tab),by=2),] %>%
                     pivot_longer(-c(1:(ncol(tab)-3)),names_to = "type",values_to = "se")) %>%
    mutate(type = case_when(type=="eff" ~ "Effective",
                            type=="fea" ~ "Feasible",
                            type=="pro" ~ "Propose"))
  
  # Reformat percentiles as necessary
  if("level" %in% colnames(tab)){
    if(any(str_detect(tab$level,pattern = "[0-9]"))){
      tab$level <- paste0("P",str_extract(tab$level,pattern = "[0-9]{2}"))
    }
  }
  
  return(tab)
}


# Produce tables for heterogeneous treatment effects
tab_hte_func <- function(mods,mods_vecs,level_values){
  tab <- tibble(vars = rep(apply(tibble(attribute = c("\\textbf{Instrument}: ","\\textbf{Coverage}: ","\\textbf{Revenue use}: ","\\textbf{Revenue use}: "),
                                        vars = mods[[1]][[3]]$Level), 1,function(x){paste0(x,collapse = "")}),each = 2*length(level_values)),
                level = rep(rep(paste0("\\multicolumn{1}{c}{",level_values,"}"),each = 2),length(mods[[1]]$amce$Level)),
                "eff"=NA_character_,"eff_adj"=NA_character_,"fea"=NA_character_,"fea_adj"=NA_character_,"pro"=NA_character_,"pro_adj"=NA_character_)
  tab[as.vector(sapply(1:length(level_values),function(l){seq(2*l-1,nrow(tab),by=length(level_values)*2)})),-c(1:2,seq(4,ncol(tab),by=2))] <- matrix(mods_vecs$coefs,ncol = (ncol(tab)-2)/2)
  tab[as.vector(sapply(1:length(level_values),function(l){seq((2*l-1)+1,nrow(tab),by=length(level_values)*2)})),-c(1:2,seq(4,ncol(tab),by=2))] <- matrix(mods_vecs$se,ncol = (ncol(tab)-2)/2)
  tab[as.vector(sapply(1:length(level_values),function(l){seq(2*l-1,nrow(tab),by=length(level_values)*2)})),-c(1:2,seq(3,ncol(tab),by=2))] <- matrix(mods_vecs$coefs_adj,ncol = (ncol(tab)-2)/2)
  tab[as.vector(sapply(1:length(level_values),function(l){seq((2*l-1)+1,nrow(tab),by=length(level_values)*2)})),-c(1:2,seq(3,ncol(tab),by=2))] <- matrix(mods_vecs$se_adj,ncol = (ncol(tab)-2)/2)
  tab[-sapply(unique(tab$vars),function(x){min(which(tab$vars==x))}),1] <- ""
  tab[seq(2,nrow(tab),by=2),2] <- ""
  Nobs <- c("\\midrule \\multicolumn{1}{l}{N(observations)}","",rep(mods[[1]]$samplesize_estimates,2),rep(mods[[2]]$samplesize_estimates,2),rep(mods[[3]]$samplesize_estimates,2))
  Nresp <- c("\\multicolumn{1}{l}{N(respondents)}","",rep(mods[[1]]$respondents,2),rep(mods[[2]]$respondents,2),rep(mods[[3]]$respondents,2))
  Ntab <- rbind(Nobs,Nresp); colnames(Ntab) <- colnames(tab)
  tab <- rbind(tab,Ntab)
  return(tab)
}

# Highlight estimates significant at the 95% confidence interval
sig_diff_func <- function(tab){
  sig_diff <- bind_rows(
    tab_to_plotdat_func(tab,adjustment = "none") %>%
      group_by(vars, type) %>%
      mutate(ll95 = est - qnorm(0.975)*se,
             ul95 = est + qnorm(0.975)*se) %>%
      na.omit() %>%
      summarize(non_overlap = if_else(any(ll95>ul95 | ul95 < ll95),1,0),.groups = 'drop') %>%
      mutate(adj = "none"),
    tab_to_plotdat_func(tab,adjustment = "holm") %>%
      group_by(vars, type) %>%
      mutate(ll95 = est - qnorm(0.975)*se,
             ul95 = est + qnorm(0.975)*se) %>%
      na.omit() %>%
      summarize(non_overlap = if_else(any(ll95>ul95 | ul95 < ll95),1,0),.groups = 'drop') %>%
      mutate(adj = "holm"))
  
  if(any(sig_diff$non_overlap==1)){
    for(i in which(sig_diff$non_overlap==1)){
      sig_diff_var = paste0("\\textbf{",str_split(sig_diff$vars[i],pattern = ": ")[[1]][1],"}: ",str_split(sig_diff$vars[i],pattern = ": ")[[1]][2])
      sig_diff_out = if_else(sig_diff$type[i]=="Effective", if_else(sig_diff$adj[i] == "none","eff","eff_adj"),
                             if_else(sig_diff$type[i]=="Feasible", if_else(sig_diff$adj[i] == "none","fea","fea_adj"),
                                     if_else(sig_diff$adj[i] == "none","pro","pro_adj")))
      tab[which(tab$vars==sig_diff_var),sig_diff_out] <- paste0("\\textbf{",tab[which(tab$vars==sig_diff_var),sig_diff_out],"}")
    }
  }
  
  return(tab)
}

